2.1.1 Main model
Analysis not including the FEAST study - only severe malaria studies
# make numeric matrix for stan input
ind_SMstudies = dat_all$study != 'FEAST (Uganda)' &
!is.na(dat_all$log10_parasites)
X = dat_all[ind_SMstudies,
c('log10_platelet',
'log10_hrp2',
'log10_parasites')]
site_index_SMstudies = as.numeric(as.factor(dat_all$study[ind_SMstudies]))
table(site_index_SMstudies)
## site_index_SMstudies
## 1 2 3
## 170 484 1398
stan_dat_all =
list(N = nrow(X),
y = X,
Ntest = ncol(X),
K_sites = max(site_index_SMstudies),
site = site_index_SMstudies,
prior_mu = matrix(c(log10(75), log10(200),
log10(250),log10(3000),
3,5),
nrow = ncol(X), byrow = T),
prior_sigma_mu = matrix(c(0.1, 0.1,
0.2, 0.2,
0.25, 0.25),
nrow = ncol(X), byrow = T),
beta_prior_prev = matrix(data = c(19,1,
14,7,
14,7),
nrow = max(site_index_SMstudies),
byrow = T),
cluster = matrix(data = c(1,2,
2,1,
2,1),
nrow = ncol(X), byrow = T))
print(stan_dat_all$N)
## [1] 2052
# just platelets and HRP2 (reference)
ind_SMstudies2 = dat_all$study != 'FEAST (Uganda)'
X = dat_all[ind_SMstudies2,c('log10_platelet','log10_hrp2')]
stan_dat_plt_hrp2 = stan_dat_all
stan_dat_plt_hrp2$y = X
stan_dat_plt_hrp2$N = nrow(X)
stan_dat_plt_hrp2$site = as.numeric(as.factor(dat_all$study[ind_SMstudies2]))
stan_dat_plt_hrp2$Ntest = 2
stan_dat_plt_hrp2$prior_mu = stan_dat_all$prior_mu[1:2,]
stan_dat_plt_hrp2$prior_sigma_mu = stan_dat_all$prior_sigma_mu[1:2,]
stan_dat_plt_hrp2$cluster = stan_dat_all$cluster[1:2,]
compile the stan model
# compile stan model
lc_mod = stan_model(file = 'Latent_class_continuous_2.stan')
Run the stan model - this is stored in Rout We check convergence with the traceplots
fname_all = paste0('Rout/', 'mod2_LC_all.stanfit')
if(RUN_STAN | !file.exists(fname_all)){
out_all = sampling(lc_mod, data=stan_dat_all, thin=4)
save(out_all, file = fname_all)
} else {
load(fname_all)
}
fname_plt_hrp2 = paste0('Rout/', 'mod2_LC_plt_hrp2.stanfit')
if(RUN_STAN | !file.exists(fname_plt_hrp2)){
out_plt_hrp2 = sampling(lc_mod, data=stan_dat_plt_hrp2, thin=4)
save(out_plt_hrp2, file = fname_plt_hrp2)
} else {
load(fname_plt_hrp2)
}
traceplot(out_all,par=c('prev','mu','sigma'))
traceplot(out_plt_hrp2,par=c('prev','mu','sigma'))
thetas_all = extract(out_all)
thetas_plt_hrp2 = extract(out_plt_hrp2)
round(100*colMeans(thetas_all$prev))
## [1] 98 73 67
round(100*colMeans(thetas_plt_hrp2$prev))
## [1] 94 70 65
mean_vals = round(10^apply(thetas_all$mu, 2:3, mean))
colnames(mean_vals) = c('SM', 'not SM')
rownames(mean_vals) = c('Platelet count','PfHRP2','Parasite count')
print(mean_vals)
##
## SM not SM
## Platelet count 73 235
## PfHRP2 182 3247
## Parasite count 7584 78667
mean_vals2 = round(10^apply(thetas_plt_hrp2$mu, 2:3, mean))
colnames(mean_vals2) = c('SM', 'not SM')
rownames(mean_vals2) = c('Platelet count','PfHRP2')
print(mean_vals2)
##
## SM not SM
## Platelet count 70 231
## PfHRP2 196 3415
hist(colMeans(thetas_all$ps_SM),breaks = 20)
ind_SM = colMeans(thetas_all$ps_SM)>thresh_highSM
ind_notSM = colMeans(thetas_all$ps_SM)<thresh_lowSM
sum(ind_SM)
## [1] 1344
sum(ind_notSM)
## [1] 463
cor.test(stan_dat_all$y[ind_SM, 1],stan_dat_all$y[ind_SM,2])
##
## Pearson's product-moment correlation
##
## data: stan_dat_all$y[ind_SM, 1] and stan_dat_all$y[ind_SM, 2]
## t = -2.7007, df = 1342, p-value = 0.007007
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1264960 -0.0201302
## sample estimates:
## cor
## -0.07352218
cor.test(stan_dat_all$y[ind_SM, 2],stan_dat_all$y[ind_SM,3])
##
## Pearson's product-moment correlation
##
## data: stan_dat_all$y[ind_SM, 2] and stan_dat_all$y[ind_SM, 3]
## t = -4.3847, df = 1342, p-value = 1.252e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.17122533 -0.06578923
## sample estimates:
## cor
## -0.1188423
cor.test(stan_dat_all$y[ind_notSM, 1],stan_dat_all$y[ind_notSM,2])
##
## Pearson's product-moment correlation
##
## data: stan_dat_all$y[ind_notSM, 1] and stan_dat_all$y[ind_notSM, 2]
## t = 2.1778, df = 461, p-value = 0.02992
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.009874462 0.190294163
## sample estimates:
## cor
## 0.1009139
cor.test(stan_dat_all$y[ind_notSM, 2],stan_dat_all$y[ind_notSM,3])
##
## Pearson's product-moment correlation
##
## data: stan_dat_all$y[ind_notSM, 2] and stan_dat_all$y[ind_notSM, 3]
## t = 3.1177, df = 461, p-value = 0.001937
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.05326703 0.23179455
## sample estimates:
## cor
## 0.1436997
round(10^apply(thetas_plt_hrp2$mu, 2:3, mean))
##
## [,1] [,2]
## [1,] 70 231
## [2,] 196 3415
round(10^(apply(thetas_plt_hrp2$mu, 2:3, mean)+ 1.96*apply(thetas_plt_hrp2$sigma, 2:3, mean)))
##
## [,1] [,2]
## [1,] 280 822
## [2,] 5476 25279
round(10^(apply(thetas_plt_hrp2$mu, 2:3, mean)- 1.96*apply(thetas_plt_hrp2$sigma, 2:3, mean)))
##
## [,1] [,2]
## [1,] 18 65
## [2,] 7 461
Check conditional independence - are the biomarkers correlated within the latent classes?
par(mfrow=c(2,2), las=1)
plot(stan_dat_all$y[ind_SM, 1],stan_dat_all$y[ind_SM,2],
xlab='Platelet count',ylab='HRP2',main='True SM',
xlim=range(stan_dat_all$y[,1]),ylim=range(stan_dat_all$y[,2]),
col=dat_all$study_col[ind_SM],panel.first=grid())
legend('bottomright',col = unique(dat_all$study_col),
legend = unique(dat_all$study),pch=1,inset=0.03)
plot(stan_dat_all$y[ind_SM, 2],stan_dat_all$y[ind_SM,3],
xlab='HRP2', xlim=range(stan_dat_all$y[,2]),
ylim=range(stan_dat_all$y[,3]),
ylab='Parasite density',main='True SM',
col=dat_all$study_col[ind_SM],panel.first=grid())
plot(stan_dat_all$y[ind_notSM, 1],stan_dat_all$y[ind_notSM,2],
xlab='Platelet count',ylab='HRP2',main='Not SM',
xlim=range(stan_dat_all$y[,1]),ylim=range(stan_dat_all$y[,2]),
col=dat_all$study_col[ind_notSM],panel.first=grid())
plot(stan_dat_all$y[ind_notSM, 2],stan_dat_all$y[ind_notSM,3],
xlim=range(stan_dat_all$y[,2]),ylim=range(stan_dat_all$y[,3]),
xlab='HRP2',ylab='Parasite density',main='Not SM',
col=dat_all$study_col[ind_notSM],panel.first=grid())